home *** CD-ROM | disk | FTP | other *** search
- #!/usr/local/bin/perl
- # This program filters raw ls listings files to produce a file that archie
- # can parse. It started life as the filt program in the original archie
- # distribution and was converted to perl and modified by Amos Shapira.
- # Then significant modifications and improvements, as well as many hours of
- # testing, were done by Eric Anderson at SURAnet (eanders@sura.net).
- #
- # Please report any bugs or send any patches to archie-admin@sura.net
- # Also regular expression gurus should feel free to comment what various
- # undocumented sections of the code actually does.
- #
- # Notes:
- # Need to add to fixpermerrors a check for whatever stupid thing the site
- # is sticking onto the front.
- # SITES we do not handle yet:
- # eba.eb.ele.tue.nl
- # harvard.harvard.edu
- # biox.unibas.ch
- # garbo.uwasa.fi
- # inria.inria.fr
- # thumper.bellcore.com
- # Filter version 1.0.4
- $debuglevel=0;
- # Debuglevels:
- $\ = "\n"; # automatically add newline on print
- @legit = ("[dl-]","[r-]","[w-]","[xsS-]","[r-]","[w-]",
- "[xsS-]","[r-]","[w-]","[x-]",
- "(\\d| )","(\\d| )","(\\d| )","[ \\d\\w]","[ \\d\\w]");
- #for ($loop=
- # Legit patterns for chars in permission part of line.
-
- $fulllegit = join("",@legit);
- $aixlegit = join("","^[DF]",@legit[1..9]);
- $dirperms = join("",'d',@legit[1..9]);
- $permlegit = join("","^",@legit[0..9]);
- $idlegit = "^(\\w|\\d|-)+$";
- #print "'$dirperms'";
- $badchar = "[^ -~\\t]";
- $corruptpattern = join('',$badchar,".*",$badchar,".*",$badchar,".*");
- $filenamebegin= -1; # Automagically initialized on the first directory.
- $printedaline=0;
- $maxdiraddsize=40;
- $curdir="";
- undef @basedirs;
- $lastline="@@@beginning";
-
- for (;<>;) {
- chop;
- # Commented out study because it was making some stuff break, I don't
- # quite know why, below is code which breaks if it is below the study.
- # print "$_";
- # print "MOOOOF" if /$dirperms/o;
- # next;
- # study;
-
- die ("@@@ Belch -- Corrupted input?\n") if /$corruptpattern/o;
-
- # If any of these cases are true, the line is not printed.
-
- # Remove totaling lines
- next if (/^total[ \t]+\d+[ \t]*$/o);
- next if (/^Total:[ \t]+\d+[ \t]*kbytes$/o);
- # Something with opendir in it.
- next if (/^opendir:/o);# {
- # Toss we are in europe lines
- next if (/WE ARE IN EUROPE/o);
- # Don't print character or block devices.
- next if (/[cb][-rwxSsTt]{9}/o);
- # Chuck lines having that pattern in them.
- next if (/can not access/o);
- # Chuck lines as seen below.
- next if (/stale nfs file handle/io);
- # Another bizarre case
- next if (/^\.:?$/o);
- # Throw away leading blank lines
- next if (/^$/ && !$printedaline);
- # I wonder what this does.
- next if (/^[ \t]/o);
- # Throw away lines containing unreadable.
- next if (/unreadable/o);
- # Throw away lines which have : no /dev/zero at the end of them
- next if (/: no \/dev\/zero$/o);
- # Throw away lines which have No such file or directory in them
- next if (/No such file or directory$/o);
- # Throw away lines with crt0: no /usr/lib/ld.so -- for sparta.spartacus.com
- next if (/^crt0: no \/usr\/lib\/ld.so$/o);
- # Throw away short lines which aren't blank and aren't directories.
- # First seen on cs.tut.fi
- next if ((length($_)<10)&&(!/:$/o)&&(!/^$/o)&&!(/^\.|\//o));
- next if ((length($_)<$filenamebegin) && /^$fulllegit/o &&
- !(/Permission denied/o||/not found/o||/cannot access/o));
- # Throw away ld.so warnings
- next if (/^ld.so: warning: /o);
- # Throw away more ld.so errors
- next if (/^ld.so: map heap error \(22\) for \/dev\/zero/o);
- # Throw away the line ${org}: for eba.eb.ele.tue.nl
- next if (/^\$\{org\}\:$/o);
- # Throw away lines with connection timed out for ftp.informatik.rwth-aachen.de
- next if (/^ls:.*Connection timed out$/o);
- # Throw away this line for aix370.rrz.uni-koeln.de
- next if (/^\.disk1\:$/o);
- # Remove blank lines which precede filename entries so that enter doesn't
- # think they are supposed to be directory names.
- if (/^$/o) {
- $_ = <STDIN>;
- # print STDERR "$_"; # ***
- last if !defined $_;
- chop;
- if (!/^$fulllegit/o) {
- print "";
- $lastline = "";
- $_ .= " "; # For the chop to eat.
- redo;
- }
- }
-
- if ($filenamebegin<0&&/^$dirperms/o) {
- $filenamebegin = length($_);
- do {
- --$filenamebegin;
- die ("filenamebegin dropped too much??") if $filenamebegin<20;
- } until ((substr($_,$filenamebegin,1) eq " ")&&
- (substr($_,$filenamebegin-2,1) =~ /\d/o));
- ++$filenamebegin;
- }
- # Make sure we don't get stuck in a loop.
- $count=0;
- $start=$_;
- # This forces idempotency. the loop that is.
- do {
- ++$count;
- die("@@@ iterated for a long time on \n'$start'\n, never got done.\n") if $count>50;
- # warn("@@@ iterations:$count") if $count>2;
- $orig = $_;
- # Try to put : after dir names in listing.
- # bin:
- # files
-
- if ((/^\./o || /^\//o) && /\w$/o && !/Permission denied/o) {
- print "";
- $lastline = "";
- $_ = "$_:";
- }
- # Dump a return in if the last line was all printable chars with a colon
- # on the end, e.g. a directory, and the last line was for real.
- if (!/^$fulllegit/o) {
- print "" if (/^[\w\/+#-\.]+:$/o && $lastline);
- # Remove an extra color from a directory name entry if it exists.
- # For wuarchive.wustl.edu
- s/::$/:/o;
- }
-
- #General cleanup
- # Hack out garbage people put on front of listings.
- if (/^\//o || /^\./o) {
- s!^\./!!o;
- s!^/usr/spool/ftp/!!o;
- s!^/pub/!!o;
- s!^/usr/local/pub/!!o;
- s!^/home/ftp/pub/!!o;
- s!^/ftp/pub/!!o;
- s!^/com/ftp/pub/!!o;
- s!^/var/spool/uucppublic/!!o;
- s!^/com/ftp/sun4/pub/!!o;
- s!^/users/ftp/!!o;
- s!^.disk1/!!o; # For aix370.rrz.uni-koeln.de
- }
-
- # What's this do?
- # s/^([-dl][-rwxSsTt]{9}.*)(\\$)/$1/o;
- # s/^([-dl][-rwxSsTt]{9})(\d+)/$1 $2/o;
- # Take out trailing / from directory listing
- s/^(d.*)\/$/$1/o;
- # Take :'s off the end of lines which aren't really directories names.
- # Why would I want to do this? Note I still do.
- # Do this so that the next line will work right.
- s/^($fulllegit.*)\:$/$1/o;
- # Hack for walhalla.informatik.uni-dortmund.de, user/group names with
- # spaces in them.
- s/NOT FTP/NOT_FTP/go;
- # Hack for gargoyle.uchicago.edu, to fix directory with return in the name
- if (/^pub\/emwq\/Mailboxes.*h$/o) {
- $_ .= ":";
- $foo = <STDIN>;
- }
- # Two hacks for eba.eb.ele.tue.nl
- if (/^l.*local.$/o) {
- s/^l(.*local)./d$1/o;
- }
- if (/^pub\/apollo\/local\/News\:$/o) {
- print "pub/apollo/local:";
- print "drwxrwxrwx 1 news 15 Mar 15 12:00 News";
- print "";
- }
-
- #Put space between permissions and id.
- if (/^[ld-][r-][w-][x-]/o && /^..........\d/o) {
- s/^(..........)/$1 /o;
- if (substr($_,$filenamebegin-1,2) eq ' ') {
- substr($_,$filenamebegin-1,2) = ' ';
- }
- }
-
- #Fix AIX bogosity
- if (/$aixlegit/o) {
- # Aix ls follows directory symlinks.
- s/ \-\> .*$//o if (/^D/o && / \-\> /o);
- s/^D/d/o;
- s/^F/-/o;
- }
- #Special hack for earth.rs.itd.umich.edu
- if (/^mac\.bin\/\.AppleDesktop\/_:$/o && !$hack'umich_edu) {
- # print STDERR "@Did hack for earth on $_."; # ***
- $hack'umich_edu=1;
- while(!/^$/o) {
- $_=<STDIN>;
- }
- }
-
- #Replace trailing spaces with underscores in directory listings.
- $spacepos=rindex($_," ");
- while (($spacepos>=$filenamebegin)
- &&((/ _*$/o)||
- (substr($_,$filenamebegin-6) =~ /^(\d| )\d(\d|\:)\d\d _* /o))) {
- # Roughly that regexp is time (13:45) or year ( 1990)
- s/ (_*)$/_$1/o;
- substr($_,$filenamebegin-6) =~
- s/^(..... )(_*) /$1$2_/o;
- $spacepos=rindex($_," ");
- }
- #Put in leading spaces for bogus stuff.
- #Ditto for the : terminated stuff.
- # Also fixup anthing like foo/bar /doobie:
- if (/:$/o) {
- s/ (_*(:$|\/))/_$1/o while (/ _*(:$|\/)/o);
- s/\/(_*) /\/_$1/o while (/\/_* /o);
- }
-
- #Complicated fixups.
- if (/Permission denied/o||/not found/o||/cannot access/o||
- /Connection timed out/o) {
- $_ = &fixlserrors($_);
- next if (! $_);
- }
- } until ($orig eq $_);
-
- if (/^$dirperms/o) { # && length($_)<$maxdiraddsize) {
- $dirname=$curdir . substr($_,$filenamebegin);
- if (length($dirname) <$maxdiraddsize) {
- push(@basedirs,$dirname);
- # print STDERR "Adding '",$dirname,"' to dir list";
- }
- }
- $curdir = substr($_,0,length($_)-1)."/" if (/:$/o);
-
- $lastline = $_;
- print;
- $printedaline=1;
- }
-
- sub fixlserrors {
- local ($_) = @_;
- local ($first);
- local ($count);
-
- # print STDERR "Enter FixLsErrors";
- # return "" if /^ls.*denied$/o;
- return "" if /^(\/bin\/)?ls.*denied$/o;
- # return "" if /^ls.*not found$/o;
- return "" if /^(\/bin\/)?ls.*not found$/o;
- return "" if /^cannot access /o;
- return "" if /^lost\+found: Permission denied$/o;
- # $foodebug=1 if $_ =~ /tesol: Per/;
- $first = &fixpermline($_);
- # print STDERR "*1$first" if $foodebug;
- $count = 0;
- $_ = undef;
- while (!$_) {
- # print STDERR "Hi";
- ++$count;
- die ("@@@ FixLsErrors iterated too long :$count\n") if $count>200;
- $_ = <STDIN>;
- last if !defined($_);
- # print STDERR "*a$count,$first,$_"; #***
- # print STDERR "'$_'" if defined $_;
- chop ;
- last if /^$/o;
- # print STDERR "*b$count,$first,$_"; #***
- if (/denied$/o) {
- $_ = "";
- } else {
- $_ = &fixpermline($_);
- }
- # print STDERR "*c$count,$first,$_"; #***
- }
- # print STDERR "*d$first" if $foodebug;
- # print STDERR "@*$first,$_" if /pleD/o;
- # print STDERR "*e$first" if $foodebug;
- $_= $first . $_;
- # print STDERR "*&&$_" if $foodebug;
- return $_ if /^$fulllegit/o;
- return $_ if /^.*:$/o;
- return "";
- }
-
- sub fixpermline {
- local ($_) = @_;
- local ($count);
-
- # print "@$_@";
- # sys13 stuff for potemkin.cs.pdx.edu
- s/(\/bin\/)?ls\s*:.*denied( \(sys13\))?$//o;
- s/(\/bin\/)?ls\s*:.*not found$//o;
- s/\.\/.*not found$//o;
- s/cannot access .*$//o;
- s/\.\/.*Connection timed out:$//o;
- # print STDERR "*$_*";
- return $_ if !(/Permission denied/o || /not found/o || /cannot access/o ||
- /Connection timed out/o);
-
- $rightmost=0;
- $longlen=0;
- foreach $elem (@basedirs) {
- # if ($foodebug&&rindex($_,$elem)>=0) {
- # print STDERR "@$elem";
- # print STDERR "@$elem,", rindex($_,$elem);
- # print STDERR "@", rindex($_,"mac/incoming");
- # }
- if (0<=($foo=rindex($_,$elem))) {
- $bar = length($elem);
- # print STDERR "@@$foo,$rightmost,$bar,$longlen,", $rightmost-$bar-1;
- if ($foo>=($rightmost-$bar-1)) {
- # Backup by at most by the length of the current one.
- # Plus a /
- $longlen=$bar;
- $rightmost=$foo;
- # print STDERR "!$_,$elem,$longlen,$rightmost" if /incoming\/pal/;
- }
- }
- }
- # if ($rightmost>0&&$foodebug) {
- # print STDERR "Found:", substr($_,$rightmost);
- # print STDERR "Returning:" , substr($_,0,$rightmost);
- # }
- if (/$fulllegit/o) {
- $_ = substr($_,0,$rightmost);
- } else {
- return substr($_,0,$rightmost) if (($rightmost>0) &&
- !(substr($_,$rightmost-1,1) eq "/"));
- # print STDERR "nope";
- }
-
- local($acc,$orig) = ("",$_);
- local(@line) = split(//o,$_);
- local($x,$m);
- local(@legitcopy) = @legit;
-
- $count=0;
- do {
- ++$count;
- die ("@@@ fixpermline iterated too long:$count\n") if $count>50;
- $_ = shift @line;
- $m = shift @legitcopy;
- # print STDERR "#$m#$_#$acc";
- return $acc if !/$m/; # Don't put the o here, this changes.
- # print STDERR "##$acc";
- $acc .= $_;
- } until $#legitcopy==-1;
- $_=$orig;
- s/\:\s*Permission denied//o;
- s/\/(\w|\/)*\s*not found//o;
- return $_;
- }
-